# -*- coding: utf8 -*-
import re
import common
from BeautifulSoup import BeautifulSoup

site_index = 'utaten'
site_keyword = 'utaten'
site_url = 'http://utaten.com/'
test_url = 'http://utaten.com/lyric/jb50903142'
test_url2 = 'http://utaten.com/lyric/lyric.php?LID=jb10508040'

def get_lyric(url):
    prefix = 'http://utaten.com/lyric/load_text.php?LID='
    pattern = '((LID=)|(/lyric/))(?P<lid>[a-z]{2}[0-9]+)'


    bool = re.compile(pattern).search(url)
    if bool:
        id = bool.group('lid')
        full_url = prefix + id

        # get data
        data = common.get_url_content(full_url)

        data = unicode(data, 'sjis')
        lyric = data[1:]

        lyric = re.compile('[ \t]+\n').sub('\n', lyric) # delete trailing
        lyric = re.compile('\t *(.*)\n').sub(r'\n(\1)\n', lyric)
        lyric = re.compile('   +').sub(',', lyric)

        return lyric

    return None

